import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

df=pd.read_csv('bayes_xinxi.txt')
print(df)

tf=CountVectorizer(token_pattern='[a-zA-Z|\u4e00-\u9fa5]+')

X=df['words']
Y=df['Y']

X=tf.fit_transform(X).A
print(tf.get_feature_names())
print(X)


X_=["Chinese Chinese Chinese Tokyo Japan"]
X_=tf.transform(X_).A

from sklearn.naive_bayes import MultinomialNB
model=MultinomialNB()
model.fit(X,Y)
print(model.predict(X_))

print(Y[model.predict(X_)])